In [1]:
%pylab inline
from classy import *
In [2]:
images=image.load_images('data/digits')
In [3]:
data=image.images_to_vectors(images)
In [4]:
data.vectors.shape
Out[4]:
In [5]:
data_train,data_test=split(data)
In [6]:
C=NaiveBayes()
timeit(reset=True)
C.fit(data_train.vectors,data_train.targets)
print("Training time: ",timeit())
print("On Training Set:",C.percent_correct(data_train.vectors,data_train.targets))
print("On Test Set:",C.percent_correct(data_test.vectors,data_test.targets))
In [7]:
F=PCA(5) # only the top 5
In [8]:
data_train_reduced=F.fit_transform_data(data_train)
data_test_reduced=F.transform_data(data_test)
In [9]:
train_vectors_reduced=F.fit_transform(data_train.vectors)
test_vectors_reduced=F.transform(data_test.vectors)
In [10]:
print("shape train vectors:",data_train.vectors.shape)
print("shape train vectors reduced:",data_train_reduced.vectors.shape)
In [11]:
timeit(reset=True)
C.fit(data_train_reduced.vectors,data_train_reduced.targets)
print("Training time: ",timeit())
print("On Training Set:",C.percent_correct(data_train_reduced.vectors,data_train_reduced.targets))
print("On Test Set:",C.percent_correct(data_test_reduced.vectors,data_test_reduced.targets))
In [12]:
F.weights.shape
Out[12]:
In [13]:
F.plot()
you can specify which ones to plot
In [14]:
F.plot([2,3,4])
In [15]:
F.imshow(shape=(8,8))
specify how many PCs to try...
In [16]:
PCs=[2,4,6,8,10,20,40]
percent_correct=[]
for n in PCs:
F=PCA(n)
data_train_reduced=F.fit_transform_data(data_train)
data_test_reduced=F.transform_data(data_test)
C=NaiveBayes()
C.fit(data_train_reduced.vectors,data_train_reduced.targets)
percent_correct.append(C.percent_correct(data_test_reduced.vectors,data_test_reduced.targets))
plot(PCs,percent_correct,'-o')
xlabel('Number of PCs')
ylabel('Percent Correct on Test Data')
Out[16]:
this does exactly the same thing, but does every number from 1 to 40, skipping every 2 (1,3,5,7,....,39]
In [17]:
PCs=arange(1,40,2)
percent_correct=[]
for n in PCs:
F=PCA(n)
data_train_reduced=F.fit_transform_data(data_train)
data_test_reduced=F.transform_data(data_test)
C=NaiveBayes()
C.fit(data_train_reduced.vectors,data_train_reduced.targets)
percent_correct.append(C.percent_correct(data_test_reduced.vectors,data_test_reduced.targets))
plot(PCs,percent_correct,'-o')
xlabel('Number of PCs')
ylabel('Percent Correct on Test Data')
Out[17]:
In [18]:
F=PCA(10)
data_train_reduced=F.fit_transform_data(data_train)
data_test_reduced=F.transform_data(data_test)
C=NaiveBayes()
C.fit(data_train_reduced.vectors,data_train_reduced.targets)
print(C.percent_correct(data_test_reduced.vectors,data_test_reduced.targets))
In [19]:
data_train_reduced_removed=extract_features(data_train_reduced,list(range(2,10)))
data_test_reduced_removed=extract_features(data_test_reduced,list(range(2,10)))
C=NaiveBayes()
C.fit(data_train_reduced_removed.vectors,data_train_reduced_removed.targets)
print(C.percent_correct(data_test_reduced_removed.vectors,data_test_reduced_removed.targets))
In [ ]: